The R code used to perform this analysis can be viewed in this report using the Code buttons to toggle code viewing. This HTML report is best viewed using a modern web browser such as Mozilla Firefox or Google Chrome. It is also available on the online repository. Printing is possible but will not produce an optimal reading experience.
For this project data from a large fast-food restaurant chain in the U.S.A. were provided. The data covered approximately 4 months from March until June 2015 in four restaurants: two in Berkeley, California, and two in New York, New York. The data provided transaction information (e.g. which menu items were purchased and in what quantities), ingredient lists for all menu items, and associated metadata (e.g. store location, store type etc).
The goal of the project was to forecast demand for lettuce for each of the the four locations. The forecast period was two weeks following the end of the data (from 2015-06-16 to 2015-06-29).
# Set default knitr options
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
# Load packages
library(dplyr)
library(tidyr)
library(ggplot2)
library(lubridate)
library(forecast)
library(purrr)
library(knitr)
library(cowplot)
# Set up theme object for prettier plots
theme_jim <- theme(legend.position = "bottom",
axis.text.y = element_text(size = 14, colour = "black"),
axis.text.x = element_text(size = 14, colour = "black"),
legend.text = element_text(size = 14),
legend.title = element_text(size = 14),
title = element_text(size = 16),
strip.text = element_text(size = 14, colour = "black"),
strip.background = element_rect(fill = "white"),
panel.grid.minor.x = element_blank(),
panel.grid.major.x = element_line(colour = "grey", linetype = "dotted"),
panel.grid.minor.y = element_line(colour = "lightgrey", linetype = "dotted"),
panel.grid.major.y = element_line(colour = "grey", linetype = "dotted"),
panel.margin.y = unit(0.1, units = "in"),
panel.background = element_rect(fill = "white", colour = "lightgrey"),
panel.border = element_rect(colour = "black", fill = NA))
# Source the data from the databse
if (Sys.info()[1] == "Linux") {
db <- src_postgres("lsca")
} else {
db <- src_postgres("lsca", user = "postgres", password = "gy!be1989")
}
# Function that takes a string and converts it in to "proper case" (i.e.
# the first letter is capitalised, all remaining letters are lower case)
# N.b. for multi-word strings, only the first word will be affected
toproper <- function(x) {
first <- substring(x, 1, 1) %>% toupper()
rest <- substring(x, 2) %>% tolower()
whole <- paste0(first, rest)
return(whole)
}
# Set lm tidy coef names
tidy_names <- c("Term", "Estimate", "Std. Error", "t-Stat.", "p-Value")
lb_names <- c("Test stat.", "p-Value", "Parameter")
The data were loaded in to a Postgresql database for storage and handling.
# Load packages for reading data
library(readr)
library(readxl)
# Read in the data
ingredients <- read_csv("./data/hw/ingredients.csv")
menu_items <- read_csv("./data/hw/menu_items.csv")
menuitem <- read_csv("./data/hw/menuitem.csv") %>% mutate(date = lubridate::ymd(date))
portion_uom_types <- read_csv("./data/hw/portion_uom_types.csv")
pos_ordersale <- read_csv("./data/hw/pos_ordersale.csv") %>% mutate(date = lubridate::ymd(date))
recipe_ingredient_assignments <- read_csv("./data/hw/recipe_ingredient_assignments.csv")
recipe_sub_recipe_assignments <- read_csv("./data/hw/recipe_sub_recipe_assignments.csv")
recipes <- read_csv("./data/hw/recipes.csv")
store_restaurant <- read_excel("./data/hw/store_restaurant.xlsx")
sub_recipe_ingr_assignments <- read_csv("./data/hw/sub_recipe_ingr_assignments.csv")
sub_recipes <- read_csv("./data/hw/sub_recipes.csv")
# Set all field names to lowercase for easier typing later
colnames(ingredients) <- colnames(ingredients) %>% tolower()
colnames(menu_items) <- colnames(menu_items) %>% tolower()
colnames(menuitem) <- colnames(menuitem) %>% tolower()
colnames(portion_uom_types) <- colnames(portion_uom_types) %>% tolower()
colnames(pos_ordersale) <- colnames(pos_ordersale) %>% tolower()
colnames(recipe_ingredient_assignments) <- colnames(recipe_ingredient_assignments) %>% tolower()
colnames(recipe_sub_recipe_assignments) <- colnames(recipe_sub_recipe_assignments) %>% tolower()
colnames(recipes) <- colnames(recipes) %>% tolower()
colnames(store_restaurant) <- colnames(store_restaurant) %>% tolower()
colnames(sub_recipe_ingr_assignments) <- colnames(sub_recipe_ingr_assignments) %>% tolower()
colnames(sub_recipes) <- colnames(sub_recipes) %>% tolower()
# Load the data to the database
tmp <- copy_to(db, ingredients, temporary = FALSE)
tmp <- copy_to(db, menu_items, temporary = FALSE)
tmp <- copy_to(db, menuitem, temporary = FALSE)
tmp <- copy_to(db, portion_uom_types, temporary = FALSE)
tmp <- copy_to(db, pos_ordersale, temporary = FALSE)
tmp <- copy_to(db, recipe_ingredient_assignments, temporary = FALSE)
tmp <- copy_to(db, recipe_sub_recipe_assignments, temporary = FALSE)
tmp <- copy_to(db, recipes, temporary = FALSE)
tmp <- copy_to(db, store_restaurant, temporary = FALSE)
tmp <- copy_to(db, sub_recipe_ingr_assignments, temporary = FALSE)
tmp <- copy_to(db, sub_recipes, temporary = FALSE)
After loading the data to the database, R scripts were run to connect to the database and make it available in R.
# Set up sql command to get all relevant tables
sql <- sql("select * from information_schema.tables where table_schema = 'public'")
# Get the set of tables
tbls <- tbl(db, sql) %>% collect() %>% .$table_name
# Loop over table names to pull in the data
for(i in seq_along(tbls)) {
assign(tbls[i], tbl(db, tbls[i]))
}
# Clean up
rm(sql, tbls, i)
As sales, recipes, sub-recipes and ingredients were all stored in separate tables, several steps were required to create a view of the demand for lettuce on a single day.
Firstly, all recipes containing lettuce were found from the data, discarding other recipes (as they were not relevant). The amount of lettuce required for each recipe was found. As some recipes listed the amount in ounces and others in grams, the amount required was standardised to be measured in grams. This produced a clean data set that listed the total amounts of lettuce required in each recipe.
tom_let_recipes <- recipes %>%
left_join(recipe_ingredient_assignments) %>%
left_join(ingredients) %>%
select(recipeid,
ingredientid,
quantity,
ingredientname,
portionuomtypeid) %>%
left_join(portion_uom_types) %>%
collect() %>%
mutate(tom = ifelse(grepl("tomato|Tomato", ingredientname), 1, 0),
let = ifelse(grepl("lettuce|Lettuce", ingredientname), 1, 0)) %>%
filter(tom == 1 | let == 1) %>%
select(recipeid,
ingredientname,
quantity,
portiontypedescription,
tom,
let) %>%
mutate(quantity_clean = ifelse(portiontypedescription == "Gram",
quantity,
28.3495 * quantity),
ingredient = ifelse(tom == 1, "tomato", "lettuce")) %>%
select(recipeid, ingredient, quantity_clean) %>%
spread(ingredient, quantity_clean) %>%
mutate(lettuce = ifelse(is.na(lettuce), 0, lettuce),
tomato = ifelse(is.na(tomato), 0, tomato))
A similar process was performed for all sub-recipes containing lettuce. The amount of lettuce required for each sub-recipe was found and standardised to grams.
tom_let_sub_recipes <- sub_recipes %>%
left_join(sub_recipe_ingr_assignments) %>%
left_join(ingredients) %>%
select(subrecipeid,
ingredientid,
quantity,
ingredientname,
portionuomtypeid) %>%
left_join(portion_uom_types) %>%
collect() %>%
mutate(tom = ifelse(grepl("tomato|Tomato", ingredientname), 1, 0),
let = ifelse(grepl("lettuce|Lettuce", ingredientname), 1, 0)) %>%
filter(tom == 1 | let == 1) %>%
select(subrecipeid,
ingredientname,
quantity,
portiontypedescription,
tom,
let) %>%
mutate(quantity_clean = ifelse(portiontypedescription == "Gram",
quantity,
28.3495 * quantity),
ingredient = ifelse(tom == 1, "tomato", "lettuce")) %>%
select(subrecipeid, ingredient, quantity_clean) %>%
spread(ingredient, quantity_clean) %>%
mutate(lettuce = ifelse(is.na(lettuce), 0, lettuce),
tomato = ifelse(is.na(tomato), 0, tomato))
The linkage between recipe and sub-recipes was then used to calculate, for each recipe, the total amounts of lettuce required by the sub-recipe(s) beneath it.
recipe_to_sub_tom_let <- recipe_sub_recipe_assignments %>%
select(recipeid, subrecipeid, factor) %>%
inner_join(tom_let_sub_recipes, copy = T) %>%
mutate(lettuce = lettuce * factor,
tomato = tomato * factor) %>%
select(recipeid, subrecipeid, lettuce, tomato) %>%
rename(sub_let = lettuce,
sub_tom = tomato) %>%
collect() %>%
group_by(recipeid) %>%
summarise(sub_let = sum(sub_let, na.rm = T),
sub_tom = sum(sub_tom, na.rm = T))
The linkage between recipe and sub-recipe was then used to calculate the total lettuce requirements for each recipe, i.e. combining the requirements of each recipe, as well as the requirements of any sub-recipes beneath each recipe. These data can be seen in table one.
recipes_to_tom_let <- tom_let_recipes %>%
full_join(recipe_to_sub_tom_let) %>%
select(recipeid, lettuce, tomato, sub_let, sub_tom) %>%
rowwise() %>%
mutate(total_lettuce = sum(lettuce, sub_let, na.rm = T),
total_tomato = sum(tomato, sub_tom, na.rm = T)) %>%
select(recipeid, total_lettuce) %>%
filter(total_lettuce > 0) %>%
collect() %>%
left_join(recipes, copy = T) %>%
select(recipeid, recipename, recipedescription, total_lettuce)
DT::datatable(recipes_to_tom_let,
colnames = c("Recipe ID", "Recipe name", "Description", "Lettuce (g)"),
caption = "Table 1: Total lettuce requirements in grams for all recipes with at least some requiement for lettuce.", rownames = FALSE)
The lettuce requirements for each menu item were then determined, based on the recipe(s) requirements for each item.
menu_items_to_tom_let <- menu_items %>%
collect() %>%
inner_join(recipes_to_tom_let)
The linkage of orders to menu items was then used to determine the lettuce requirements for each order, based on the items selected in each order, and the number of each item selected (for example, lettuce requirements would double for the same item ordered twice within one order).
# Use menuitem to link ordersale md5 IDs to total lettuce and tomato requirements
let_tom_reqs <- menuitem %>%
select(md5key_ordersale, quantity, plu, id, date) %>%
collect() %>%
inner_join(menu_items_to_tom_let, by = c("plu" = "plu",
"id" = "menuitemid")) %>%
mutate(lettuce = quantity * total_lettuce) %>%
select(md5key_ordersale, lettuce) %>%
group_by(md5key_ordersale) %>%
summarise(lettuce = sum(lettuce))
# Link lettuce and tomato requirements to sales
sales_to_demand <- pos_ordersale %>%
collect() %>%
inner_join(let_tom_reqs)
The daily demand of lettuce at each store was then determined via a simple summarisation process.
# Summarise by data to create ts
daily_demand <- sales_to_demand %>%
group_by(storenumber, date) %>%
summarise(lettuce = sum(lettuce)) %>%
mutate(store = as.character(storenumber)) %>%
left_join(store_restaurant, by = c("storenumber" = "store_number"), copy = T) %>%
mutate(store_address1 = stringr::str_trim(store_address1),
store_city = stringr::str_trim(store_city),
store_state = stringr::str_trim(store_state)) %>%
select(store, store_state, store_city, date, lettuce) %>%
ungroup()
The daily demand of lettuce at each of the four restaurants in the data was visualised in order to understand the structure of the data, in order that sensible forecasting methods could be applied.
Figure one shows that there does not appear to be a strong trend in either the Californian or the New York restaurants. Restaurant 20974 initially shows very low demand, before picking up to show demand comparable to the other stores. There is clearly some seasonality in the demand within Californian restaurants which appears to be at a weekly level. Such a strong pattern of seasonality is not observed for the restaurants in New York.
daily_demand %>%
ggplot(aes(x = date, y = lettuce, colour = store_state)) +
geom_line(size = 1.25) +
scale_x_date(date_breaks = "2 week") +
scale_color_brewer(type = "qual", palette = "Dark2") +
facet_grid(store ~ .) +
guides(colour = guide_legend(title = "Store state")) +
xlab("") +
ylab("Demand for lettuce (g)") +
theme_jim +
scale_y_continuous(labels = scales::comma) +
theme(axis.text.x = element_text(angle = 45,
vjust = 0.5,
hjust = .5)) +
geom_vline(xintercept = 16587, linetype = "dashed", colour = "darkgrey")
Figure 1: Daily demand for lettuce, split by restaurant. The dashed grey line shows the train/test split point for fitting models (the last two weeks of the data).
In order to compare forecasting methods it was necessary to select some data from the time series for testing/evaluation purposes. It was decided that the last two weeks of data from each series would be used to form a testing set of data. The dashed grey lines in figure one show this cut-off of training and testing data.
Using the final two-week period of each series for testing forecasting approaches, the data were split in to a train and test set prior to fitting any models. The data were nested by store in order to make fitting multiple time series models more straightforward.
# Split in to train and test
train <- daily_demand %>%
filter(date <= max(date) - days(14)) %>%
group_by(store, store_state, store_city) %>%
nest(.key = ts_data)
# Last 2 weeks = test
test <- daily_demand %>%
filter(date > max(date) - days(14)) %>%
group_by(store, store_state, store_city) %>%
nest(.key = ts_data)
The lettuce daily demand data for each store was converted in to an R timeseries object in order to fit the forecasting models.
# First convert the time-series data in to actual R timeseries objects
# Daily deman is modelled with frequency = 7 as noted here http://robjhyndman.com/hyndsight/dailydata/
# By R. Hyndman (author of forecast!)
make_ts <- function(df) {
series <- ts(df$lettuce, frequency = 7)
return(series)
}
# Use some functional programming on the nested data to convert it to a timeseries
train <- train %>%
mutate(ts_ts = ts_data %>% map(make_ts))
test <- test %>%
mutate(ts_ts = ts_data %>% map(make_ts))
Exponential smoothing state space models (Hyndman et. al., 2002) were applied to the time series using the ets function. Despite the absence of obvious trends in the exploratory visualisations (figure one), the function was applied in a fully automated manner. This was done in order for potential trends to be identified by the model, should they exist in the data.
# Function to fit the ets model to time series data with no trend
fit_ets <- function(ts_data) {
ets(ts_data, model = "ZZZ")
}
# Apply the model to each store's timeseries
train <- train %>%
mutate(ets = ts_ts %>% map(fit_ets))
The estimates from the ETS models were then used to specify a complete Holt-Winters model, using the HoltWinters function. In this way trends (and an estimation of the seasonal component) of each series detected by ets were passed to HoltWinters for refinement.
# Define function to run HW using output from the ets estimations
fit_hw <- function(ts_data, ets_model) {
# Extract components from ets model
l <- ets_model$par["l"]
if (is.na(l)) { l <- NULL }
b <- ets_model$par["b"]
if (is.na(b)) { b <- NULL }
alpha <- ets_model$par["alpha"]
alpha_l <- ifelse(is.na(alpha), FALSE, TRUE) # Fit with alpha?
if (is.na(alpha)) { alpha <- 0.3} # Set default value if not viable
beta <- ets_model$par["beta"]
beta_l <- ifelse(is.na(beta), FALSE, TRUE) # Fit with beta?
if (is.na(beta)) { beta <- 0.1}
gamma <- ets_model$par["gamma"]
gamma_l <- ifelse(is.na(gamma), FALSE, TRUE) # Fit with gamma?
if (is.na(gamma)) { gamma <- 0.1}
seasonality <- ets_model$components[3]
seasonality <- ifelse(seasonality == "A", "additive",
ifelse(seasonality == "N", "additive",
"multiplicative"))
# Set logical alpha/beta/gamma parametes
hw <- HoltWinters(ts_data, alpha = alpha_l, beta = beta_l, gamma = gamma_l,
seasonal = seasonality,
start.periods = 2,
l.start = l,
b.start = b,
optim.start = c(alpha = alpha,
beta = beta,
gamma = gamma))
return(hw)
}
# Apply the function to fit the HW models
train <- train %>%
mutate(hw = ts_ts %>% map2(ets, fit_hw))
The estimation of the components (i.e. level, trend and seasonal), where applicable, along with the fitted values from each Holt-Winters model are displayed in the figures below. For neither Californian store was a trend detected, whereas both New York stores have a trend that varies over the series (something that was not expected based on the visualisations in figure one). Each figure also displays the raw demand data in the first panel. The fitted values show a good correspondence with the raw data.
Further details on the merits of the Holt-Winters model are discussed in the model comparison section.
# Create function to plot HW fitted model, using store and city name for labels
plot_hw <- function(store, city, hw_object, ts) {
# Create data from fitted part of the model - fortify for ggplot2 plotting
data <- hw_object$fitted %>% fortify.zoo()
ts_data <- ts %>% fortify.zoo() %>% setNames(c("Index", "raw"))
data <- data %>% left_join(ts_data)
# Set tidier column names
colnames(data) <- colnames(data) %>% toproper() %>% gsub("Xhat", "Fitted", .)
idx <- seq(ymd(min(daily_demand$date)), ymd(max(daily_demand$date)), length.out = nrow(data))
data$Index <- idx
# Create plotting data in format that ggplot2 likes
data <- data %>%
gather(field, value, -Index) %>%
mutate(field = factor(field, levels = c("Raw", "Fitted", "Level", "Season", "Trend"))) %>%
na.omit()
# Create title for the plot
title <- paste0("Holt-Winters model estimation for store ", store, " (", city, ")")
# Make the plot
data %>%
ggplot(aes(x = Index, y = value, group = field, colour = field)) +
geom_line(size = 1.5, aes(colour = field)) +
scale_colour_brewer(palette = "Set1") +
facet_grid(field ~ ., scales = "free_y") +
scale_y_continuous(labels = scales::comma) +
guides(colour = guide_legend(title = "")) +
xlab("Time") +
ylab("Value") +
ggtitle(title) +
theme_jim +
theme(title = element_text(size = 14),
axis.text.y = element_text(size = 12))
}
# Apply function to the data
train <- train %>%
mutate(hw_plot = pmap(list(store, store_city, hw, ts_ts), plot_hw))
train$hw_plot[[1]]
Figure 2: Holt-Winters model estimates and fitted values
train$hw_plot[[2]]
Figure 3: Holt-Winters model estimates and fitted values
train$hw_plot[[3]]
Figure 4: Holt-Winters model estimates and fitted values
train$hw_plot[[4]]
Figure 5: Holt-Winters model estimates and fitted values
Prior to fitting an ARIMA model, each series had to be made stationary.
Inspection of the series in figure 1 indicated that the models already had constant variance. In order to confirm this analytically, the McLeod-Li test for constant variance was applied. The average p-values for each model are displayed in table 2 below. It can be seen that the null hypothesis (that there is non-constant variance) can be rejected in each case. As such no action was taken (e.g. taking logarithms) to transform each series.
# Function to fit model
fit_ml <- function(ts_data) {
p_val <- TSA::McLeod.Li.test(y = ts_data, plot = F)["p.values"] %>% unlist %>% mean()
return(p_val)
}
# Fit McLeod-Li to each series
train <- train %>% mutate(ml = ts_ts %>% map(fit_ml))
# Present results
train %>%
select(store, store_city, store_state, ml) %>%
mutate(ml = ml %>% map_dbl(round, 4)) %>%
kable(col.names = c("Store", "City", "State", "McLeod-Li p-value"),
caption = "Table 2: McLeod-Li p-values")
| Store | City | State | McLeod-Li p-value |
|---|---|---|---|
| 4904 | Berkeley | California | 0.0001 |
| 12631 | Ridgewood | New York | 0.0129 |
| 20974 | Elmhurst | New York | 0.0000 |
| 46673 | Berkeley | California | 0.0002 |
The appropriate number of differences required for each series to be made stationary were then found using the ndiffs function. A similar approach was taken to find the number of seasonal differences required, using the nsdiffs function. The results from these tests are displayed in table 3 below. KPSS, ADF, and PP refer to the KPSS, Augmented Dickey-Fuller and Phillips-Perron tests for simple differencing requirements. CH and OCSB refer to Cenova-Hansen and Osborn-Chui-Smith-Birchenhall tests for determining seasonal difference requirements.
train <- train %>%
mutate(# Determine difference reqs
diffs_kpss = ts_ts %>% map_dbl(ndiffs, alpha = 0.01, test = "kpss"),
diffs_adf = ts_ts %>% map_dbl(ndiffs, alpha = 0.01, test = "adf"),
diffs_pp = ts_ts %>% map_dbl(ndiffs, alpha = 0.01, test = "pp"),
# Determine seasonal difference reqs
sdiffs_ch = ts_ts %>% map_dbl(nsdiffs, test = "ch", max.D = 2),
sdiffs_ocsb = ts_ts %>% map_dbl(nsdiffs, test = "ocsb", max.D = 2))
train %>%
select(store, store_city, diffs_kpss, diffs_adf, diffs_pp, sdiffs_ch, sdiffs_ocsb) %>%
kable(col.names = c("Store", "City", "KPSS", "ADF", "PP", "CH", "OCSB"),
caption = "Table 3: Difference and seasonal difference requirements for each store's series")
| Store | City | KPSS | ADF | PP | CH | OCSB |
|---|---|---|---|---|---|---|
| 4904 | Berkeley | 0 | 0 | 0 | 0 | 1 |
| 12631 | Ridgewood | 0 | 0 | 0 | 0 | 0 |
| 20974 | Elmhurst | 0 | 1 | 0 | 0 | 0 |
| 46673 | Berkeley | 0 | 0 | 0 | 0 | 1 |
Given that the KPSS and PP methods indicated that no differencing was required for any of the four series and only one series (Elmhurst) is indicated to require differencing by the ADF test, no differencing was performed in order to make the series stationary.
However, given the strong pattern of seasonality observed in the Californian stores (figure one), the results from the OCSB method of detecting seasonal difference requirements were used to perform seasonal differencing.
The auto.arima function was used to fit an ARIMA model for each of the stores, using the differencing values identified above.
diff_ts_and_fit <- function(ts, diff) {
fitted <- auto.arima(ts, d = 0, D = diff)
return(fitted)
}
train <- train %>%
mutate(arima = map2(ts_ts, sdiffs_ocsb, diff_ts_and_fit),
arima_fitted = arima %>% map(forecast::fitted.Arima))
The raw data were then be compared to the fitted values from each ARIMA model to give an understanding of how well the models fit the data.
These comparisons were visualised and are presented in the figures below.
plot_arima <- function(store, city, fitted, raw) {
# Set up objects to create plotting data
fitted <- fitted %>% as.numeric()
raw <- raw %>% as.numeric()
idx <- idx <- seq(ymd(min(daily_demand$date)), ymd(max(daily_demand$date)), length.out = length(fitted))
# Create plotting dataframe
data <- data_frame(fitted = fitted, raw = raw, idx = idx) %>%
gather(key, value, -idx) %>%
mutate(key = ifelse(key == "fitted", "Fitted", "Raw"),
key = factor(key, levels = c("Raw", "Fitted")))
# Create title for the plot
title <- paste0("ARIMA model estimation for store ", store, " (", city, ")")
data %>%
ggplot(aes(x = idx, y = value, colour = key, group = key)) +
geom_line(size = 1.5) +
scale_colour_brewer(palette = "Set1") +
scale_y_continuous(labels = scales::comma) +
guides(colour = guide_legend(title = "")) +
xlab("Time") +
ylab("Value") +
ggtitle(title) +
theme_jim +
theme(title = element_text(size = 14),
axis.text.y = element_text(size = 12))
}
train <- train %>%
mutate(arima_plot = pmap(list(store, store_city, arima_fitted, ts_ts), plot_arima))
The ARIMA models show nice overlap with the raw demand data, with slightly better results for the two Berkeley (CA) stores. Store 12631 (Ridgewood, NY) shows the estimated demand differ strongly in the ARIMA-fitted and real values.
Further details on the merits of the ARIMA models are discussed in the model comparison section.
train$arima_plot[[1]]
Figure 6: ARIMA model fitted values compared with raw demand.
train$arima_plot[[2]]
Figure 7: ARIMA model fitted values compared with raw demand.
train$arima_plot[[3]]
Figure 8: ARIMA model fitted values compared with raw demand.
train$arima_plot[[4]]
Figure 9: ARIMA model fitted values compared with raw demand.
There are two methods that can be applied when diagnosing the two modelling approaches. The first approach considers the models in isolation, and performs residual analysis to estimate model quality and goodness-of-fit. The second approach is to forecast using each model and compare the results with the test data, summarising differences using common error metrics. Both approaches have been taken and are discussed below.
A common method of evaluating a Holt-Winters model is to examine the residuals. In a good model fit, these should have a mean of 0 and display constant variance.
# Function to extract residuals
extract_resid <- function(model) {
resids <- residuals(model)
return(resids)
}
# Extract HW residuals
train <- train %>% mutate(hw_resid = hw %>% map(extract_resid))
After extracting the residuals from each model. The average value was computed and the residuals plotted out.
plot_resid <- function(resids, store, model) {
# Set up objects to create plotting data
resids <- resids %>% as.numeric()
idx <- idx <- seq(ymd(min(daily_demand$date)), ymd(max(daily_demand$date)), length.out = length(resids))
# Create plotting dataframe
data <- data_frame(resids = resids, idx = idx)
# Create title
title <- paste("Residual plot for", model, "\n applied to store", store)
data %>%
ggplot(aes(x = idx, y = resids, group = 1)) +
geom_hline(yintercept = 0, size = .5, linetype = "dashed") +
geom_line(size = 1.5, colour = "steelblue") +
theme_jim +
xlab("Time") +
ylab("Residual") +
ggtitle(title) +
scale_y_continuous(labels = scales::comma) +
theme(title = element_text(size = 12))
}
train <- train %>% mutate(hw_resid_plot = pmap(list(hw_resid, store, "Holt-Winters"), plot_resid))
In order to assess if the residuals from each model had constant variance, ACF plots were used, displaying the first 35 lags (5 weeks of data).
plot_acf <- function(residuals, lag = 35) {
vals <- acf(residuals, lag.max = lag, plot = F)
title <- paste0("ACF plot of residuals\n up to lag ", lag, " (", lag/7, " weeks)")
autoplot(vals) +
theme_jim +
ggtitle(title) +
scale_x_continuous(breaks = seq(vals$lag %>% min, vals$lag %>% max, 1))
}
# Create ACF plots for each model
train <- train %>% mutate(hw_acf = hw_resid %>% map(plot_acf))
Ljung–Box tests were also used to quantify the presence of serial correlations in the residuals.
fit_lb <- function(residuals, lag = 35) {
fit <- Box.test(residuals, lag = lag, type = "Ljung-Box")
broom::tidy(fit)
}
# Train extract L-B test for each model
train <- train %>% mutate(hw_lb = hw_resid %>% map(fit_lb))
The results from these diagnostic analyses show that the Holt-Winters models have not fit these data particularly well. The residuals from each model have non-zero means, and both the ACF plots and the Ljung-Box tests reveal that there is serial correlation in the residuals.
This means that the Holt-Winters models have not fully detected the underlying structure of the demand series and are not completely correct. Only the test-set error, however, will reveal if these models are useful in this context.
The average residual was -1.33. The plot of the residuals and the ACF plot are displayed in figure 10A and 10B below.
plot_grid(train$hw_resid_plot[[1]], train$hw_acf[[1]], align = "hv", labels = c("A", "B"), vjust = 1.2)
Figure 10: Residual and ACF plots
The results from the Ljung-Box test are presented in table 4. The significant p-value indicates the presence of serial correlation in the residuals, indicating a poor model fit.
kable(train$hw_lb[[1]],
col.names = lb_names,
caption = "Table 4: Ljung-Box test statitics",
digits = 3)
| Test stat. | p-Value | Parameter |
|---|---|---|
| 65.64 | 0.001 | 35 |
The average residual was -9.45. The plot of the residuals and the ACF plot are displayed in figure 11A and 11B below.
plot_grid(train$hw_resid_plot[[2]], train$hw_acf[[2]], align = "hv", labels = c("A", "B"), vjust = 1.2)
Figure 11: Residual and ACF plots
The results from the Ljung-Box test are presented in table 5. The significant p-value indicates the presence of serial correlation in the residuals, indicating a poor model fit.
kable(train$hw_lb[[2]],
col.names = lb_names,
caption = "Table 5: Ljung-Box test statitics",
digits = 3)
| Test stat. | p-Value | Parameter |
|---|---|---|
| 97.339 | 0 | 35 |
The average residual was -7.98. The plot of the residuals and the ACF plot are displayed in figure 12A and 12B below.
plot_grid(train$hw_resid_plot[[3]], train$hw_acf[[3]], align = "hv", labels = c("A", "B"), vjust = 1.2)
Figure 12: Residual and ACF plots
The results from the Ljung-Box test are presented in table 6. The significant p-value indicates the presence of serial correlation in the residuals, indicating a poor model fit.
kable(train$hw_lb[[3]],
col.names = lb_names,
caption = "Table 6: Ljung-Box test statitics",
digits = 3)
| Test stat. | p-Value | Parameter |
|---|---|---|
| 166.211 | 0 | 35 |
The average residual was 6.85. The plot of the residuals and the ACF plot are displayed in figure 13A and 13B below.
plot_grid(train$hw_resid_plot[[4]], train$hw_acf[[4]], align = "hv", labels = c("A", "B"), vjust = 1.2)
Figure 13: Residual and ACF plots
The results from the Ljung-Box test are presented in table 7. The significant p-value indicates the presence of serial correlation in the residuals, indicating a poor model fit.
kable(train$hw_lb[[4]],
col.names = lb_names,
caption = "Table 7: Ljung-Box test statitics",
digits = 3)
| Test stat. | p-Value | Parameter |
|---|---|---|
| 410.766 | 0 | 35 |
As with the Holt-Winters models, residual analysis can be useful in understanding the goodness-of-fit for an ARIMA model. The residuals were obtained and plotted, and ACF plots and Ljung-Box tests used to assess the quality of each model.
train <- train %>% mutate(arima_resid = arima %>% map(extract_resid),
arima_resid_plot = pmap(list(arima_resid, store, "ARIMA"), plot_resid),
arima_acf = arima_resid %>% map(plot_acf),
arima_lb = arima_resid %>% map(fit_lb))
The ARIMA model residual analyses show much better goodness-of-fit measures than the Holt-Winters methods, especially for the Californian stores. The residuals for the Californian stores have a mean of 0 (not the case for the two New York stores where the fit is poorer). All four stores display residuals with no serial correlation (insignificant p-values for the Ljung-Box tests).
Overall these results indicate the the ARIMA models are better suited to fitting these time series. Comparisons of forecasting errors on the testing set provide further information on their suitability in this setting.
The average residual was 0.83. The plot of the residuals and the ACF plot are displayed in figure 14A and 14B below.
plot_grid(train$arima_resid_plot[[1]], train$arima_acf[[1]], align = "hv", labels = c("A", "B"))
Figure 14: Residual and ACF plots
The results from the Ljung-Box test are presented in table 8. The insignificant p-value indicates the absence of serial correlation in the residuals, indicating a good model fit.
kable(train$arima_lb[[1]],
col.names = lb_names,
caption = "Table 8: Ljung-Box test statistics",
digits = 3)
| Test stat. | p-Value | Parameter |
|---|---|---|
| 32.079 | 0.61 | 35 |
The average residual was 66.04. The plot of the residuals and the ACF plot are displayed in figure 15A and 15B below.
plot_grid(train$arima_resid_plot[[2]], train$arima_acf[[2]], align = "hv", labels = c("A", "B"))
Figure 15: Residual and ACF plots
The results from the Ljung-Box test are presented in table 9. The insignificant p-value indicates the absence of serial correlation in the residuals, indicating a good model fit.
kable(train$arima_lb[[2]],
col.names = lb_names,
caption = "Table 9: Ljung-Box test statistics",
digits = 3)
| Test stat. | p-Value | Parameter |
|---|---|---|
| 35.271 | 0.455 | 35 |
The average residual was 132.92. The plot of the residuals and the ACF plot are displayed in figure 16A and 16B below.
plot_grid(train$arima_resid_plot[[3]], train$arima_acf[[3]], align = "hv", labels = c("A", "B"))
Figure 16: Residual and ACF plots
The results from the Ljung-Box test are presented in table 10. The insignificant p-value indicates the absence of serial correlation in the residuals, indicating a good model fit.
kable(train$arima_lb[[3]],
col.names = lb_names,
caption = "Table 10: Ljung-Box test statistics",
digits = 3)
| Test stat. | p-Value | Parameter |
|---|---|---|
| 32.938 | 0.568 | 35 |
The average residual was -0.86. The plot of the residuals and the ACF plot are displayed in figure 17A and 17B below.
plot_grid(train$arima_resid_plot[[4]], train$arima_acf[[4]], align = "hv", labels = c("A", "B"))
Figure 17: Residual and ACF plots
The results from the Ljung-Box test are presented in table 11. The insignificant p-value indicates the absence of serial correlation in the residuals, indicating a good model fit.
kable(train$arima_lb[[4]],
col.names = lb_names,
caption = "Table 11: Ljung-Box test statistics",
digits = 3)
| Test stat. | p-Value | Parameter |
|---|---|---|
| 32.562 | 0.586 | 35 |
Each model (i.e. one Holt-Winters and one ARIMA model per store) was used to forecast for the 14 days following the end of the training data.
# Create function to extract avg. forecasts for the next 14 days
forecast_fortnight <- function(ts_model) {
forecast(ts_model, h = 14)$mean %>% as.numeric()
}
# Forecast for 14 days
train <- train %>%
mutate(hw_forecast = hw %>% map(forecast_fortnight),
arima_forecast = arima %>% map(forecast_fortnight))
These forecasts were then compared with the real demand values from the test data set.
# Expand forecasts by un-nesting training data and joining in the test data
forecasts <- train %>%
select(store, store_state, store_city, hw_forecast, arima_forecast) %>%
unnest() %>%
group_by(store, store_state, store_city) %>%
mutate(forecast = row_number()) %>%
left_join(test %>%
select(-ts_ts) %>%
unnest() %>%
select(-storenumber) %>%
group_by(store, store_state, store_city) %>%
mutate(forecast = row_number())) %>%
mutate(hw_diff = hw_forecast - lettuce,
arima_diff = arima_forecast - lettuce)
Figure 18 presents the results of the differences for each store and forecasting method. It is the case that ARIMA performs much better than Holt-Winters for both Californian and New York stores. Additionally, forecasts for Californian stores are much better than for New York stores when looking at either method. Forecasts are a little too high for Californian stores, and too low for New York stores.
forecasts %>%
ungroup() %>%
select(store, store_state, forecast, hw_diff, arima_diff) %>%
gather(method, diff, -c(store:forecast)) %>%
mutate(method = ifelse(method == "arima_diff", "ARIMA", "Holt-Winters")) %>%
ggplot(aes(x = store, y = diff, fill = store_state)) +
geom_boxplot() +
facet_grid(. ~ method) +
scale_fill_brewer(palette = "Dark2") +
xlab("Store") +
ylab("Test-set forecast error") +
guides(fill = guide_legend(title = "Store State")) +
scale_y_continuous(labels = scales::comma, limits = c(-10000, 1000)) +
theme_jim
Figure 18: Forecasting errors for each store using both ARIMA and Holt-Winters methods.
In order to summarise the overall error for each model and each store, the mean absolute deviation (MAD) was calculated for each method. The MAD error metric was chosen as, in this setting, it is intuitive and describes how much the forecast demands were “out” from the true values in the same units as the true demand values are measured in.
mad <- forecasts %>%
ungroup() %>%
select(store, store_state, forecast, hw_diff, arima_diff) %>%
gather(method, diff, -c(store:forecast)) %>%
mutate(diff_abs = abs(diff)) %>%
group_by(store, store_state, method) %>%
summarise(mad = mean(diff_abs)) %>%
spread(method, mad)
kable(mad,
col.names = c("Store ID", "State", "ARIMA MAD", "Holt-Winters MAD"),
digits = 1,
caption = "Table 12: MAD Values for ARIMA and Holt-Winters forecasted values when compared to those in the the testing set.")
| Store ID | State | ARIMA MAD | Holt-Winters MAD |
|---|---|---|---|
| 12631 | New York | 988.5 | 7201.5 |
| 20974 | New York | 1169.4 | 2849.4 |
| 46673 | California | 729.9 | 819.0 |
| 4904 | California | 1026.1 | 1232.9 |
These results are presented in table 12. Although there are differences between the two methods at a store level; overall it is the case that ARIMA performs better than Holt-Winters, with an average MAD value of 978.5, compared to 3,025.7 for Holt-Winters.
In this setting this means that the ARIMA models were out by around 1,000 grams, whereas the Holt-Winters models were out by around 3,000 grams, a significantly weaker performance.
The results of both the residual diagnostics and the test-set forecast errors reveal that the ARIMA method is better than the Holt-Winters approach in this setting. As such, the ARIMA method will be used to perform the overall forecast for the two weeks following the end of the data.
Having determined that the ARIMA models are more suitable for fitting these time series, the train and testing data were re-combined and an ARIMA model re-fit for forecasting unobserved periods.
# Re-nest the raw data, this time not splitting in to test/train and the creating a time series for each set
data <- daily_demand %>%
group_by(store, store_state, store_city) %>%
nest(.key = ts_data) %>%
mutate(ts_ts = ts_data %>% map(make_ts))
The series were seasonally-differenced in the same manner as before (i.e. based on the output of the nsdiffs function) and an ARIMA model fit to each series.
# Determine difference requirements
data <- data %>%
mutate(# Determine difference reqs
diffs_kpss = ts_ts %>% map_dbl(ndiffs, alpha = 0.01, test = "kpss"),
diffs_adf = ts_ts %>% map_dbl(ndiffs, alpha = 0.01, test = "adf"),
diffs_pp = ts_ts %>% map_dbl(ndiffs, alpha = 0.01, test = "pp"),
# Determine seasonal difference reqs
sdiffs_ch = ts_ts %>% map_dbl(nsdiffs, test = "ch", max.D = 2),
sdiffs_ocsb = ts_ts %>% map_dbl(nsdiffs, test = "ocsb", max.D = 2))
data <- data %>%
mutate(arima = map2(ts_ts, sdiffs_ocsb, diff_ts_and_fit))
Forecasts for the two weeks following the end of the data were then made. These forecasts are visualised in figure 19 A, B, C and D.
# Create function to extract forecasts for the next 14 days with CI and plot them plotting
forecast_fortnight_plots <- function(ts_model, store) {
# Forecast
cast <- forecast(ts_model, h = 14)
# Set up title
title <- paste("Forecast plot for demand of lettuce \n at store ", store)
# Make the plot
autoplot(cast) +
theme_jim +
scale_y_continuous(labels = scales::comma) +
ylab("Demand") +
xlab("Time") +
ggtitle(title) +
guides(fill = guide_legend(title = "Confidence level (%)"))
}
# Forecast for 14 days
data <- data %>%
mutate(arima_forecast = arima %>% map(forecast_fortnight),
arima_forecast_plot = map2(arima, store, forecast_fortnight_plots))
plot_grid(data$arima_forecast_plot[[1]],
data$arima_forecast_plot[[2]],
data$arima_forecast_plot[[3]],
data$arima_forecast_plot[[4]],
labels = letters[1:4] %>% toupper(),
vjust = 1.1)
Figure 19: Forecast plots for each store using ARIMA methods.
The point-estimate forecasts for each store are presented in table 13 below, and are submitted with this report as a standalone csv file.
# Unnest and clean up
forecast_tbl <- data %>%
select(store, store_state, store_city, arima_forecast) %>%
unnest() %>%
group_by(store, store_state, store_city) %>%
mutate(date = seq(from = ymd("2015-06-16"),
to = ymd("2015-06-29"),
by = "day")) %>%
ungroup() %>%
mutate(store = gsub("46673", "California 1 (ID:46673)", store),
store = gsub("4904", "California 2 (ID:4904)", store),
store = gsub("12631", "New York 1 (ID:12631)", store),
store = gsub("20974", "New York 2 (ID:20974)", store),
arima_forecast = round(arima_forecast, 2)) %>%
select(store, date, arima_forecast) %>%
spread(store, arima_forecast) %>%
rename(Date = date)
# Push results to csv
readr::write_csv(forecast_tbl, "./output/CID01135629_Leach_Jim_Forecasts.csv")
# Display the table
kable(forecast_tbl,
caption = "Table 13: Forecast values for each store")
| Date | California 1 (ID:46673) | California 2 (ID:4904) | New York 1 (ID:12631) | New York 2 (ID:20974) |
|---|---|---|---|---|
| 2015-06-16 | 4255.78 | 10204.43 | 7353.34 | 6784.25 |
| 2015-06-17 | 3997.05 | 9870.59 | 7216.18 | 7170.98 |
| 2015-06-18 | 4391.50 | 8797.40 | 8530.56 | 7515.70 |
| 2015-06-19 | 3489.35 | 6331.46 | 8026.73 | 5886.42 |
| 2015-06-20 | 2984.78 | 6331.19 | 7177.95 | 5315.97 |
| 2015-06-21 | 5041.21 | 9874.37 | 7354.14 | 5787.92 |
| 2015-06-22 | 4790.64 | 9832.02 | 6740.89 | 6639.97 |
| 2015-06-23 | 3985.12 | 10316.20 | 7664.72 | 6473.02 |
| 2015-06-24 | 3868.30 | 9976.49 | 6941.75 | 6826.01 |
| 2015-06-25 | 4278.83 | 8897.74 | 8697.98 | 7140.23 |
| 2015-06-26 | 3737.71 | 6426.52 | 8566.72 | 5626.99 |
| 2015-06-27 | 3152.96 | 6421.26 | 7023.57 | 5094.17 |
| 2015-06-28 | 4418.59 | 9959.71 | 7257.91 | 5526.57 |
| 2015-06-29 | 4730.90 | 9912.87 | 6827.43 | 6311.00 |